import pandas as pd
import pymysql
from matplotlib import pyplot as plt
from wordcloud import WordCloud

# 指定matplotlib默认字体
plt.rcParams['font.sans-serif'] = ['SimHei']

db_config = {
    'host' : '127.0.0.1',
    'port' : 3306,
    'user' : 'root',
    'password' : 'employment_analysis',
    'database' : 'spider',
    'charset' : 'utf8mb4'
}

conn = pymysql.connect(**db_config)
query = "select language from gitee"
df = pd.read_sql(query, conn)
conn.close()
# 数据清洗-删除包含空值的行
df = df.dropna(subset=['language'])
language_counts = df['language'].value_counts()

def show_language_count():
    plt.figure(figsize=(10, 6))
    language_counts.plot(kind='bar')
    plt.title('分类数量统计')
    plt.xlabel('分类')
    plt.ylabel('数量')
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.show()

def show_wordcloud():
    # 生成词云
    text = ' '.join(df['language'])
    wordcloud = WordCloud(
        width=800,
        height=400,
        background_color='white',
        font_path='simhei.ttf'  # 如果有中文字符，请设置字体路径
    ).generate(text)

    # 显示词云图
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.title('Language Word Cloud')
    plt.show()

show_language_count()
show_wordcloud()